clear
set more off
**Aaron Flaaen
**August 13, 2013
**Last Updated: July 21, 2014
**This File 
**-------------------------------------------------------------------------

cd $dir



**--------------------------------------------------------------------------
**Step 1: Create Final/Intermediate Goods Classification
**--------------------------------------------------------------------------

**Step 1.1 Generate 2007 Prod Thresholds for 8-Digit Naics, and 7-Digit, 6 Digit

**Base is 8-Digit Industry
use cmf_2007_prod_finalgoods.dta, clear
sort naics_code
save cmf_2007_prod_finalgoods_temp.dta, replace

**Collapse by 6-Digit, 5-Digit, 4-Digit Industry, in the event we can't find a 
** match fat the  8-digit level
forvalues j = 6(-1)4 {
	use cmf_2007_prod_finalgoods.dta, clear
	gen naics_code`j' = substr(naics_code,1,`j')
	collapse (sum) prod, by(naics_code`j' baseroot)
	bys naics_code`j': egen prod_tot = sum(prod)
	gen ratio = prod/prod_tot

	**Label as Final Good if one product occupies more than 10% of Total
	gen final_pot = 0
	replace final_pot = 1 if ratio>0.10
	bys naics_code`j': egen num_final_pot = sum(final_pot)
	sort naics_code`j'
	save cmf_2007_prod_finalgoods_temp`j'.dta, replace
}


**Step 1.2 Year 2009 2010 and 2011
forvalues yr = 2009(1)2011 {

	use DCA_LBD_`yr'.dta, clear
	keep naics_code
	duplicates drop
	
	drop if substr(naics_code,1,1)~="3"
	sort naics_code
	merge naics_code using cmf_2007_prod_finalgoods_temp.dta
	tab _m
	
	**If can't find an industry at 8-digit level in CMF, find one at 
	** Less disaggregated level.
	preserve
	keep if _m==3
	drop _m
	save temp_8digit.dta, replace
	restore
	keep if _m==1
	drop _m
	forvalues j = 6(-1)4 {
		keep naics_code
		gen naics_code`j' = substr(naics_code,1,`j')
		collapse (first) naics_code, by(naics_code`j')	
		sort naics_code`j'
		merge naics_code`j' using cmf_2007_prod_finalgoods_temp`j'.dta
		tab _m
		preserve
		keep if _m==3
		drop _m
		save temp_`j'digit.dta, replace
		restore
		keep if _m==1
		drop _m
		drop naics_code`j'
	}
	append using temp_4digit.dta
	append using temp_5digit.dta
	append using temp_6digit.dta
	append using temp_8digit.dta
	
	*keep naics_code prod prod_tot ratio final_pot num_final_pot alpha
	drop naics_code6 naics_code5 naics_code4
	sort naics_code
	save ind_prod_temp.dta, replace

	use DCA_LBD_`yr'.dta, clear
	keep alpha naics_code
	drop if substr(naics_code,1,1)~="3"
	sort naics_code
	
	**Cartesian Product!!
	joinby naics_code using ind_prod_temp.dta
		
	sort alpha baseroot
	save DCA_LBD_`yr'_intprod.dta, replace
}

erase temp_4digit.dta 
erase temp_5digit.dta
erase temp_6digit.dta
erase temp_8digit.dta
erase ind_prod_temp.dta
erase cmf_2007_prod_finalgoods_temp6.dta
erase cmf_2007_prod_finalgoods_temp5.dta
erase cmf_2007_prod_finalgoods_temp4.dta





**--------------------------------------------------------------------------
**Step 4: Bring in CMF-Products File and create Int/Final Distinctions
**--------------------------------------------------------------------------

**Step 4.1 Prep LFTTD Data
forvalues i = 2009(1)2012 {
	foreach ddd in exp imp {
		!gunzip `ddd'_mon_`i'.dta.gz
		use `ddd'_mon_`i'.dta, clear
		rename naics baseroot
		
		**Remove imputed, converted, or zero/missing quantities
		drop if qty1_imp~=""
		drop if qty1_con~=""
		drop if qty1_zero==1
		
		gen jpn = 0
		replace jpn = 1 if country=="xxxx"
		gen na = 0
		replace na = 1 if country=="xxxx" | country=="xxxx"
		gen njpn = 0
		replace njpn = 1 if country~="xxxx"
		
		collapse (sum) relvalue nonrelvalue relqty nonrelqty (firstnm) jpn na njpn, by(firmid baseroot hs country month)
		
		save `ddd'_monthly_`i'_temp.dta, replace
		keep firmid baseroot
		duplicates drop
		save `ddd'_`i'_temp.dta, replace
	}
}
	
	


**Step 4.2: Merge Together and Collapse by Type
forvalues i = 2009(1)2012 {
	foreach ddd in exp imp {
		!gunzip dca_lbd_`i'_intprod.dta.gz
		use dca_lbd_`i'_intprod.dta, clear
		merge m:1 firmid baseroot using `ddd'_`i'_temp
		tab _m
		drop if _m==1
		drop _m
		replace final_pot = 0 if final_pot==.
		replace ratio = 0 if ratio==.
		
		merge 1:m firmid baseroot using `ddd'_monthly_`i'_temp.dta
		
		gen intm = 0
		replace intm = 1 if final_pot==0
		
		drop final_pot
		
		gen rel_uv = relvalue / relqty
		gen nonrel_uv = nonrelvalue / nonrelqty
		
		save mon_`ddd'_`i'_temp2.dta, replace
		!gzip dca_lbd_`i'_intprod.dta.gz
	}
}



**Step 4.3 Clean up Unnecessary Files
foreach ddd in exp imp {
	forvalues i = 2009(1)2012 {
		erase `ddd'_monthly_`i'_temp.dta
		erase `ddd'_`i'_temp.dta
		!gzip `ddd'_mon_`i'.dta
	}
		
}



**--------------------------------------------------------------------------
**Step 5: Saturate, Create Other variables
**--------------------------------------------------------------------------

************************************************************************************
**AF: 08/30: NOT GOING TO SATURATE UVS
************************************************************************************

/*
**4.1 Saturate the Monthly Panel
forvalues j = 2009(1)2012 {
	use mon_exp_`j'_temp2.dta, clear
	keep firmid year
	duplicates drop
	save replicate_`j'.dta, replace
	gen month = 1
	compress
	forvalues i = 2(1)12 {
		append using replicate_`j'.dta
		replace month= `i' if month==.
	}
	sort firmid month
	merge firmid month using mon_exp_`j'_temp2.dta
	drop _m
	save mon_exp_`j'_temp3.dta, replace
	erase mon_exp_`j'_temp2.dta
	erase replicate_`j'.dta

	use mon_imp_`j'_temp2.dta, clear
	keep firmid year
	duplicates drop
	save replicate_`j'.dta, replace
	gen month = 1
	compress
	forvalues i = 2(1)12 {
		append using replicate_`j'.dta
		replace month= `i' if month==.
	}
	sort firmid month
	merge firmid month using mon_imp`j'_temp2.dta
	drop _m
	save mon_imp_`j'_temp3.dta, replace
	erase mon_imp_`j'_temp2.dta
	erase replicate_`j'.dta
}


**4.2 Generate extra variables (collapsing int/fin rel/nrel distinctions)
forvalues j = 2009(1)2012 {
	use mon_exp_`j'_temp2.dta, clear 
	
	
	
	
	
	
	save mon_exp_`j'_temp4.dta, replace 
	erase mon_exp_`j'_temp3.dta
}

forvalues j = 2009(1)2012 {
	use mon_imp_`j'_temp2.dta, clear 
	
	
	
	*drop naimp_rel_fin naimp_rel_int naimp_nrel_fin naimp_nrel_int
	
	
	save mon_imp_`j'_temp4.dta, replace 
	erase mon_imp_`j'_temp3.dta
}

*/


forvalues j = 2009(1)2012 {
	use mon_exp_`j'_temp2.dta, clear
	capture destring year, replace
	sort firmid month
	save exp_firm`j'monthly_clean_uv.dta, replace
	erase mon_exp_`j'_temp2.dta
	
	use mon_imp_`j'_temp2.dta, clear
	capture destring year, replace
	sort firmid month
	save imp_firm`j'monthly_clean_uv.dta, replace
	erase mon_imp_`j'_temp2.dta
}

**--------------------------------------------------------------------------
**Step 5: Create Dataset of Monthly Exports, Imports and Firm-Ownership
**--------------------------------------------------------------------------

**-----------------------------------------------------
**5.1 Prep DCA_LBD Data
forvalues j = 2009(1)2012 {

	!gunzip lbd_dca_`j'_firm.dta.gz
	use lbd_dca_`j'_firm.dta, clear

	*Reduce Variables
	*drop flag_for_mult
	gen japan = 0
	replace japan = 1 if ifscode==xxx
	
	compress

	**Reduce Sample
	**Either: Keep only if emp>xx
	*drop if emp<xx
	**Or: Keep only Manufacturing
	gen naics2 = substr(naics_code,1,2)
	keep if naics2=="xx" | naics2=="xx" | naics2=="xx"
	drop naics2

	save dca_lbd_`j'_temp.dta, replace
	**Create Monthly Copies
	gen month = 1
	compress
	forvalues i = 2(1)12 {
		append using dca_lbd_`j'_temp.dta
		replace month= `i' if month==.
	}
	sort firmid month
	save dca_lbd_`j'_month.dta, replace


	**AF 08/30
	**DIFFERENT MERGING WITH TRADE DATA
	**WILL DO IT SEPARATELY FOR EXP AND IMP
	use exp_firm`j'monthly_clean_uv.dta, clear
	compress
	capture drop _m
	merge m:1 firmid month using dca_lbd_`j'_month.dta
	tab _m	
	**_m==1 ==> Non-Manufacturing Firms that Trade
	**_m==2 ==> Manufacturing Firms that Don't Trade
	**_m==3 ==> Manufacturing Firms that Trade
	**NOW ONLY LOOKING AT THOSE THAT TRADE
	keep if _m==3
	drop _m
	capture drop year
	gen year = `j'
	compress
	save DCA_LBD_LFTTD_`j'_month_exp_uv.dta, replace
	
	
	use imp_firm`j'monthly_clean_uv.dta, clear
	compress
	capture drop _m
	merge m:1 firmid month using dca_lbd_`j'_month.dta
	tab _m	
	**_m==1 ==> Non-Manufacturing Firms that Trade
	**_m==2 ==> Manufacturing Firms that Don't Trade
	**_m==3 ==> Manufacturing Firms that Trade
	**NOW ONLY LOOKING AT THOSE THAT TRADE
	keep if _m==3
	drop _m
	capture drop year
	gen year = `j'
	compress
	save DCA_LBD_LFTTD_`j'_month_imp_uv.dta, replace
	
	**clean up
	!gzip lbd_dca_`j'_firm.dta
	erase imp_firm`j'monthly_clean_uv.dta
	erase exp_firm`j'monthly_clean_uv.dta
	
	
}
**-----------------------------------------------------
*/

use DCA_LBD_LFTTD_2009_month_exp_uv.dta
**5.4 Stack 2010 and 2011 (and 2009?)
append using DCA_LBD_LFTTD_2010_month_exp_uv.dta
append using DCA_LBD_LFTTD_2011_month_exp_uv.dta

/*CAN"T DO THIS EITHER */
/*
**Only keep if the firm is present in 2009, 2010, and 2011
bys firmid: gen copies = _N
tab copies
drop if copies<xx
drop copies
compress
*/

**Now add 2012?
append using DCA_LBD_LFTTD_2012_month_exp_uv.dta



**Fixes to Firm-Level Indicators
**For These: Enforce all indicators to be japanese, foreign mult

	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if  firmid=="`fname'"
}

	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if firmid=="`fname'"
}


	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if firmid=="`fname'"
}

**For These: Enforce all indicators to be non-japanese

	qui replace japan=0 if firmid=="`fname'"
}

replace flag_for_mult=1 if firmid == ""
replace flag_for_mult=0 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""

**For the rest with disagreements: replace as non-jpn
bys firmid: egen minjpn = min(japan)
replace japan = 0 if minjpn~=japan
drop minjpn

save analysisdata_manuf_exp_uv.dta, replace
!gzip analysisdata_manuf_exp_uv.dta


**REPEAT FOR IMPORTS
use DCA_LBD_LFTTD_2009_month_imp_uv.dta
**5.4 Stack 2010 and 2011 (and 2009?)
append using DCA_LBD_LFTTD_2010_month_imp_uv.dta
append using DCA_LBD_LFTTD_2011_month_imp_uv.dta

/*CAN"T DO THIS EITHER */
/*
**Only keep if the firm is present in 2009, 2010, and 2011
bys firmid: gen copies = _N
tab copies
drop if copies<xx
drop copies
compress
*/

**Now add 2012?
append using DCA_LBD_LFTTD_2012_month_imp_uv.dta



**Fixes to Firm-Level Indicators
**For These: Enforce all indicators to be japanese, foreign mult

	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if  firmid=="`fname'"
}

	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if firmid=="`fname'"
}


	qui replace japan = 1 if firmid=="`fname'"
	qui replace flag_us_mult=0 if firmid=="`fname'"
	qui replace flag_for_mult=1 if firmid=="`fname'"
}

**For These: Enforce all indicators to be non-japanese

	qui replace japan=0 if firmid=="`fname'"
}

replace flag_for_mult=1 if firmid == ""
replace flag_for_mult=0 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_for_mult=0 if firmid==""
replace flag_us_mult=1 if firmid==""
replace flag_for_mult=0 if firmid==""

**For the rest with disagreements: replace as non-jpn
bys firmid: egen minjpn = min(japan)
replace japan = 0 if minjpn~=japan
drop minjpn


save analysisdata_manuf_imp_uv.dta, replace
!gzip analysisdata_manuf_imp_uv.dta

**--------------------------------------------------------------------------
**Step 5: Bring in Uniworld Data, do some checks
**--------------------------------------------------------------------------


erase dca_lbd_2009_temp.dta
erase dca_lbd_2010_temp.dta
erase dca_lbd_2011_temp.dta
erase dca_lbd_2012_temp.dta

erase dca_lbd_2009_month.dta
erase dca_lbd_2010_month.dta
erase dca_lbd_2011_month.dta
erase dca_lbd_2012_month.dta

erase DCA_LBD_LFTTD_2009_month_exp_uv.dta
erase DCA_LBD_LFTTD_2010_month_exp_uv.dta
erase DCA_LBD_LFTTD_2011_month_exp_uv.dta
erase DCA_LBD_LFTTD_2012_month_exp_uv.dta


erase DCA_LBD_LFTTD_2009_month_imp_uv.dta
erase DCA_LBD_LFTTD_2010_month_imp_uv.dta
erase DCA_LBD_LFTTD_2011_month_imp_uv.dta
erase DCA_LBD_LFTTD_2012_month_imp_uv.dta

